import pandas as pd
from numpy import array,asarray,zeros
from keras.preprocessing.text import one_hot,Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense,SpatialDropout1D,LSTM,Flatten
from keras.layers.embeddings import Embedding
import re 
from sklearn.model_selection import train_test_split


data_train = pd.read_csv("./ChnSentiCorp/train.tsv", sep='\t', header=0)
print(data_train.head(5))
data_train = data_train[{"label","text_a"}]
print(data_train.head(5))

maxFeatures = 2000
tokenizer = Tokenizer(num_words=maxFeatures,split="")
tokenizer = tokenizer.fit_on_texts()

